*********************************************************************************
*** process demographic census data to county level ***
*********************************************************************************

*********************************************************************************
use "$temp/census_raw", clear

* urban flag *
g urban = metro > 1

* college degree flag *
assert educd != 999 //no missing value
g bac_or_more = educ >= 10

* race flags: pct white, pct black; pct hispanic *
g white = race == 1
g black = race == 2
g hispanic = hispan > 0

recode countyfip(0=.)
drop if countyfip == . & missing(puma)

// impute missing fips codes from puma codes for 2011 and before
preserve
	keep if year <= 2011 & missing(countyfip) & !missing(puma)
	joinby statefip puma using "$datadir/crosswalk/puma2000_2_fips.dta", update
	save "${temp}/puma2000_fips_imputations", replace
restore

// impute missing fips codes from puma codes for 2012 and later
preserve
	keep if year > 2011 & missing(countyfip) & !missing(puma)
	joinby statefip puma using "$datadir/crosswalk/puma2010_2_fips.dta", update
	save "${temp}/puma2010_fips_imputations", replace
restore

// drop if fips codes are missing to avoid duplicates
drop if year <= 2011 & missing(countyfip) & !missing(puma)
drop if year > 2011 & missing(countyfip) & !missing(puma)

// add in fips imputations
append using "${temp}/puma2000_fips_imputations"
append using "${temp}/puma2010_fips_imputations"

assert !missing(countyfip)

// recalculate weights if person is assigned to multiple counnties
replace perwt = perwt / n_fips if !missing(n_fips)

* collapse to county/conspuma level *
g obs=1
collapse (sum) urban bac_or_more white black hispanic obs [aw=perwt], by(year statefip countyfip)
foreach v of varlist urban bac_or_more white black hispanic {
	g pct_`v' = `v'/obs
}

* interpolate between years within county if no observations
egen id = group(statefip countyfip)
tsset id year
tsfill 

sort id year 
foreach v in urban bac_or_more white black hispanic {
	ipolate pct_`v' year , gen(pct_`v'_i) by(id)  
}

by id: ipolate countyfip year, gen(countyfip_i)
by id: ipolate statefip year, gen(statefip_i)

foreach v in pct_urban pct_bac_or_more pct_white pct_black pct_hispanic countyfip statefip { 
	replace `v' = `v'_i if inrange(year, 2001,2004) 
}

drop *_i 

drop if pct_urban==. & pct_bac==. & pct_white==. 
g fips = statefip*1000 + countyfip 
keep pct* year statefip countyfip fips

sort fips year 
sa $temp/census_county_level, replace 


*********************************************************************************
*** process property tax payments from acs to county level *** 
*********************************************************************************
u $temp/census_raw, clear
drop if proptx==0
g proptax = 0 if proptx==1

local j = 0
//use codebook to code in property tax value 
forvalues i =2/21 {
	replace proptax = 25 + `j' if proptx==`i'
	local j = `j'+50
}

replace proptx99 = proptx99-1 if inrange(proptx99, 58, 62)
local j = 1000
forvalues i =22/61 {
	replace proptax = 50 + `j' if proptx==`i'
	local j = `j'+ 100
}

replace proptax = 5250 if proptx==63
replace proptax = 5750 if proptx==64
replace proptax = 6500 if proptx==65
replace proptax = 7500 if proptx==66
replace proptax = 8500 if proptx==67
replace proptax = 9500 if proptx==68
replace proptax = 11000 if proptx==69


label var proptax "Annual Property Tax (mean, $)"

recode countyfip(0=.)
drop if countyfip == . & missing(puma)

// impute missing fips codes from puma codes for 2011 and before
preserve
	keep if year <= 2011 & missing(countyfip) & !missing(puma)
	joinby statefip puma using "$datadir/crosswalk/puma2000_2_fips.dta", update
	save "${temp}/puma2000_fips_imputations", replace
restore

// impute missing fips codes from puma codes for 2012 and later
preserve
	keep if year > 2011 & missing(countyfip) & !missing(puma)
	joinby statefip puma using "$datadir/crosswalk/puma2010_2_fips.dta", update
	save "${temp}/puma2010_fips_imputations", replace
restore

// drop if fips codes are missing to avoid duplicates
drop if year <= 2011 & missing(countyfip) & !missing(puma)
drop if year > 2011 & missing(countyfip) & !missing(puma)

// add in fips imputations
append using "${temp}/puma2000_fips_imputations"
append using "${temp}/puma2010_fips_imputations"

assert !missing(countyfip)

// recalculate weights if person is assigned to multiple counnties
replace perwt = perwt / n_fips if !missing(n_fips)



collapse (mean) proptax [aw=hhwt], by(year statefip countyfip)

egen id = group(statefip countyfip)
tsset id year
tsfill 

by id: ipolate proptax year, gen(proptax_i)
by id: ipolate countyfip year, gen(countyfip_i)
by id: ipolate statefip year, gen(statefip_i)

foreach v in proptax countyfip statefip {
	replace `v' = `v'_i if inrange(year, 2001,2004) 
}

drop *_i 

g fips = statefip*1000 + countyfip 
keep proptax year statefip countyfip fips

merge 1:1 countyfip statefip fips year using $temp/census_county_level, nogen 

sa $datadir/census_county_level, replace 

